PUBPOL543-Plot1: univariate - Frequency Table of Race in the United States

Menghan Zhai

upload ACS data

library(foreign)
link = "https://github.com/G7-PUBPOL-543/ACS/raw/main/ManipulatedACS_V2.dta"
ACSData = read.dta(link)

check variable: race

# Check names of all the variables in ACS Data
names(ACSData)
##   [1] "hhwt"       "numprec"    "hhtype"     "adjust"     "region"    
##   [6] "statefip"   "countyfip"  "metro"      "city"       "citypop"   
##  [11] "ownershp"   "mortgage"   "acrehous"   "mortamt1"   "mortamt2"  
##  [16] "taxincl"    "insincl"    "propinsr"   "owncost"    "rentgrs"   
##  [21] "costelec"   "costgas"    "costwatr"   "costfuel"   "hhincome"  
##  [26] "foodstmp"   "valueh"     "builtyr2"   "unitsstr"   "phone"     
##  [31] "vehicles"   "ssmc"       "perwt"      "famsize"    "sex"       
##  [36] "age"        "marst"      "birthyr"    "marrno"     "yrmarr"    
##  [41] "race"       "raced"      "hispan"     "hispand"    "bpl"       
##  [46] "bpld"       "citizen"    "yrnatur"    "yrimmig"    "yrsusa1"   
##  [51] "language"   "speakeng"   "tribe"      "racamind"   "racasian"  
##  [56] "racblk"     "racpacis"   "racwht"     "racother"   "hcovany"   
##  [61] "hcovpriv"   "hinsemp"    "hinspur"    "hinstri"    "hcovpub"   
##  [66] "hinscaid"   "hinscare"   "hinsva"     "hinsihs"    "school"    
##  [71] "educ"       "educd"      "schltype"   "degfield"   "degfieldd" 
##  [76] "degfield2"  "degfield2d" "empstat"    "empstatd"   "labforce"  
##  [81] "wkswork1"   "wkswork2"   "uhrswork"   "inctot"     "ftotinc"   
##  [86] "incwage"    "incss"      "incwelfr"   "incinvst"   "incretir"  
##  [91] "incsupp"    "incother"   "incearn"    "poverty"    "diffrem"   
##  [96] "diffphys"   "diffmob"    "diffcare"   "diffsens"   "diffeye"   
## [101] "diffhear"   "pwstate2"   "pwcounty"   "tranwork"   "carpool"   
## [106] "riders"     "trantime"   "departs"    "arrives"
#see values in the variable race:
head(ACSData$race,20)
##  [1] white                            white                           
##  [3] white                            black/african american/negro    
##  [5] white                            other race, nec                 
##  [7] white                            white                           
##  [9] white                            white                           
## [11] white                            white                           
## [13] white                            two major races                 
## [15] white                            white                           
## [17] black/african american/negro     other race, nec                 
## [19] american indian or alaska native black/african american/negro    
## 9 Levels: white ... three or more major races

get frequency table of race: RaceFreq

# absolute values
RaceCount=table(ACSData$race,
                exclude = 'nothing') 
RaceCount
## 
##                            white     black/african american/negro 
##                            87159                            14408 
## american indian or alaska native                          chinese 
##                              915                             1595 
##                         japanese  other asian or pacific islander 
##                              289                             5246 
##                  other race, nec                  two major races 
##                             6022                             3579 
##        three or more major races 
##                              494
# relative values
RaceProptoplot=prop.table(RaceCount)*100
RaceProptoplot
## 
##                            white     black/african american/negro 
##                       72.8102784                       12.0360547 
## american indian or alaska native                          chinese 
##                        0.7643663                        1.3324200 
##                         japanese  other asian or pacific islander 
##                        0.2414228                        4.3823669 
##                  other race, nec                  two major races 
##                        5.0306164                        2.9898001 
##        three or more major races 
##                        0.4126743
# Turn the frequency table into a data frame:
RaceFreq=as.data.frame(RaceProptoplot)
# renaming data frame
names(RaceFreq)=c("race","pct")

RaceFreq
##                               race        pct
## 1                            white 72.8102784
## 2     black/african american/negro 12.0360547
## 3 american indian or alaska native  0.7643663
## 4                          chinese  1.3324200
## 5                         japanese  0.2414228
## 6  other asian or pacific islander  4.3823669
## 7                  other race, nec  5.0306164
## 8                  two major races  2.9898001
## 9        three or more major races  0.4126743

Time to plot!

library(ggplot2)

Order RaceFreq

RaceFreq = RaceFreq[order(RaceFreq$pct),]

RaceFreq
##                               race        pct
## 5                         japanese  0.2414228
## 9        three or more major races  0.4126743
## 3 american indian or alaska native  0.7643663
## 4                          chinese  1.3324200
## 8                  two major races  2.9898001
## 6  other asian or pacific islander  4.3823669
## 7                  other race, nec  5.0306164
## 2     black/african american/negro 12.0360547
## 1                            white 72.8102784
# save the order
RaceOrd = RaceFreq[order(RaceFreq$pct),'race']

base

base = ggplot(data = RaceFreq, 
             aes(x = race,
                 y = pct)) 
base = base + theme_light() # add background: light theme
## add order
base= base + scale_x_discrete(limits=RaceOrd) 

base

plot1: + geom bar

plot1 = base + geom_bar(stat='identity')

plot1

plot2: + title

# name the title
TitleRace = list(Title = "Frequency Table of Race in the United States",
                 SubTi = NULL,
                 XTi = "Race",
                 YTi = "Percentage",
                 Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
plot2 = plot1 + labs(title = TitleRace$Title,
                    subtitle = TitleRace$SubTi,
                    x = TitleRace$XTi,
                    y = TitleRace$YTi,
                    caption = TitleRace$Sou)
plot2

plot3: customize Y axis

library(scales)
plot3 = plot2 + scale_y_continuous(breaks = c(5, 10, 20, 40, 60, 80),
                                   limits = c(0, 80),
                                   labels = unit_format(suffix = '%')) 
plot3

plot4: Title/subtitle positions:

plot4 = plot3 + theme(plot.caption = element_text(hjust = 1),
                      plot.title = element_text(hjust = 0.5))
plot4 = plot4 + coord_flip() # flip the plot to fully show the types of race

plot4

plot5: Annotating the bars:

paste0(round(RaceFreq$pct,2), '%')
## [1] "0.24%"  "0.41%"  "0.76%"  "1.33%"  "2.99%"  "4.38%"  "5.03%"  "12.04%"
## [9] "72.81%"
LABELS=paste0(round(RaceFreq$pct,2), '%')
plot5 = plot4 + geom_text(vjust = 0,
                          hjust = -0.1,
                          size = 3,
                          aes(y = pct ,
                              label = LABELS))
plot5

Final Univariate Plot

plot5

PUBPOL543-Plot2: bivariate - Contingency Tables of Race and Health Insurance Coverage in the United States

Menghan Zhai

I’ll explore Race and Health Insurance Coverage in the United States in this plot. As these two variable are both categorical, I’ll use contigency tables to organize the relationship.

head(ACSData,5)
##   hhwt numprec hhtype   adjust                 region statefip countyfip
## 1   58       1    n/a 1.010145 east south central div  alabama         0
## 2   64       1    n/a 1.010145 east south central div  alabama         0
## 3   54       1    n/a 1.010145 east south central div  alabama         0
## 4   75       1    n/a 1.010145 east south central div  alabama         0
## 5   26       1    n/a 1.010145 east south central div  alabama         3
##                                                                        metro
## 1 in metropolitan area: central/principal city status indeterminable (mixed)
## 2 in metropolitan area: central/principal city status indeterminable (mixed)
## 3                                                   not in metropolitan area
## 4                            in metropolitan area: in central/principal city
## 5 in metropolitan area: central/principal city status indeterminable (mixed)
##                                       city citypop ownershp mortgage acrehous
## 1 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 2 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 3 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 4 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 5 not in identifiable city (or size group)       0      n/a      n/a      n/a
##   mortamt1 mortamt2 taxincl insincl propinsr owncost rentgrs costelec costgas
## 1        0        0     n/a     n/a        0   99999       0        0       0
## 2        0        0     n/a     n/a        0   99999       0        0       0
## 3        0        0     n/a     n/a        0   99999       0        0       0
## 4        0        0     n/a     n/a        0   99999       0        0       0
## 5        0        0     n/a     n/a        0   99999       0        0       0
##   costwatr costfuel hhincome foodstmp  valueh builtyr2 unitsstr phone vehicles
## 1        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 2        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 3        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 4        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 5        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
##                                           ssmc perwt                 famsize
## 1 households without a same-sex married couple    58 1 family member present
## 2 households without a same-sex married couple    64 1 family member present
## 3 households without a same-sex married couple    54 1 family member present
## 4 households without a same-sex married couple    75 1 family member present
## 5 households without a same-sex married couple    26 1 family member present
##      sex age                marst birthyr         marrno yrmarr
## 1   male  37 never married/single    1982 not applicable      0
## 2   male  19 never married/single    2000 not applicable      0
## 3 female  87              widowed    1932   married once   1953
## 4   male  26 never married/single    1993 not applicable      0
## 5   male  49 never married/single    1970 not applicable      0
##                           race                        raced       hispan
## 1                        white                        white not hispanic
## 2                        white                        white not hispanic
## 3                        white                        white not hispanic
## 4 black/african american/negro black/african american/negro not hispanic
## 5                        white                        white not hispanic
##        hispand            bpl           bpld citizen yrnatur yrimmig yrsusa1
## 1 not hispanic        indiana        indiana     n/a     n/a       0       0
## 2 not hispanic        alabama        alabama     n/a     n/a       0       0
## 3 not hispanic        alabama        alabama     n/a     n/a       0       0
## 4 not hispanic north carolina north carolina     n/a     n/a       0       0
## 5 not hispanic        alabama        alabama     n/a     n/a       0       0
##   language                 speakeng                   tribe racamind racasian
## 1  english yes, speaks only english not applicable or blank       no       no
## 2  english yes, speaks only english not applicable or blank       no       no
## 3  english yes, speaks only english not applicable or blank       no       no
## 4  english yes, speaks only english not applicable or blank       no       no
## 5  english yes, speaks only english not applicable or blank       no       no
##   racblk racpacis racwht racother                        hcovany
## 1     no       no    yes       no   no health insurance coverage
## 2     no       no    yes       no with health insurance coverage
## 3     no       no    yes       no with health insurance coverage
## 4    yes       no     no       no   no health insurance coverage
## 5     no       no    yes       no with health insurance coverage
##                                    hcovpriv
## 1 without private health insurance coverage
## 2    with private health insurance coverage
## 3    with private health insurance coverage
## 4 without private health insurance coverage
## 5 without private health insurance coverage
##                                hinsemp                          hinspur
## 1  no insurance through employer/union  no insurance purchased directly
## 2 has insurance through employer/union  no insurance purchased directly
## 3  no insurance through employer/union has insurance purchased directly
## 4  no insurance through employer/union  no insurance purchased directly
## 5  no insurance through employer/union  no insurance purchased directly
##                        hinstri                                  hcovpub
## 1 no insurance through tricare without public health insurance coverage
## 2 no insurance through tricare without public health insurance coverage
## 3 no insurance through tricare    with public health insurance coverage
## 4 no insurance through tricare without public health insurance coverage
## 5 no insurance through tricare    with public health insurance coverage
##                         hinscaid hinscare                  hinsva
## 1  no insurance through medicaid       no no insurance through va
## 2  no insurance through medicaid       no no insurance through va
## 3 has insurance through medicaid      yes no insurance through va
## 4  no insurance through medicaid       no no insurance through va
## 5 has insurance through medicaid       no no insurance through va
##                                      hinsihs            school
## 1 no insurance through indian health service no, not in school
## 2 no insurance through indian health service    yes, in school
## 3 no insurance through indian health service no, not in school
## 4 no insurance through indian health service no, not in school
## 5 no insurance through indian health service no, not in school
##                  educ                                        educd
## 1   1 year of college 1 or more years of college credit, no degree
## 2   1 year of college 1 or more years of college credit, no degree
## 3            grade 12                  regular high school diploma
## 4             grade 9                                      grade 9
## 5 grade 5, 6, 7, or 8                                      grade 8
##        schltype degfield degfieldd degfield2 degfield2d            empstat
## 1  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 2 public school      n/a       n/a       n/a        n/a           employed
## 3  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 4  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 5  not enrolled      n/a       n/a       n/a        n/a           employed
##             empstatd                   labforce wkswork1    wkswork2 uhrswork
## 1 not in labor force no, not in the labor force       16 14-26 weeks       55
## 2            at work    yes, in the labor force       26 14-26 weeks       40
## 3 not in labor force no, not in the labor force        0         n/a      n/a
## 4 not in labor force no, not in the labor force       48 48-49 weeks       40
## 5            at work    yes, in the labor force       11  1-13 weeks       21
##   inctot ftotinc incwage incss incwelfr incinvst incretir incsupp incother
## 1  15800 9999999   15800     0        0        0        0       0        0
## 2    800 9999999     800     0        0        0        0       0        0
## 3  13800 9999999       0 13800        0        0        0       0        0
## 4  23500 9999999   23500     0        0        0        0       0        0
## 5  10700 9999999    1700     0        0        0        0    9000        0
##   incearn poverty                  diffrem                  diffphys
## 1   15800       0 has cognitive difficulty  no ambulatory difficulty
## 2     800       0  no cognitive difficulty  no ambulatory difficulty
## 3       0       0 has cognitive difficulty has ambulatory difficulty
## 4   23500       0  no cognitive difficulty  no ambulatory difficulty
## 5    1700      81 has cognitive difficulty has ambulatory difficulty
##                             diffmob diffcare                        diffsens
## 1  no independent living difficulty       no no vision or hearing difficulty
## 2  no independent living difficulty       no no vision or hearing difficulty
## 3 has independent living difficulty      yes no vision or hearing difficulty
## 4  no independent living difficulty       no no vision or hearing difficulty
## 5 has independent living difficulty      yes no vision or hearing difficulty
##   diffeye diffhear pwstate2 pwcounty            tranwork      carpool
## 1      no       no      n/a        0                 n/a          n/a
## 2      no       no  alabama        0 auto, truck, or van drives alone
## 3      no       no      n/a        0                 n/a          n/a
## 4      no       no      n/a        0                 n/a          n/a
## 5      no       no  alabama        3 auto, truck, or van     carpools
##         riders trantime departs arrives
## 1          n/a        0       0       0
## 2 drives alone       15     902     919
## 3          n/a        0       0       0
## 4          n/a        0       0       0
## 5            3       15     702     719
# check variable hcovany: with health insurance coverage or not
head(ACSData$hcovany,20)
##  [1] no health insurance coverage   with health insurance coverage
##  [3] with health insurance coverage no health insurance coverage  
##  [5] with health insurance coverage with health insurance coverage
##  [7] with health insurance coverage with health insurance coverage
##  [9] with health insurance coverage with health insurance coverage
## [11] with health insurance coverage with health insurance coverage
## [13] no health insurance coverage   with health insurance coverage
## [15] with health insurance coverage with health insurance coverage
## [17] no health insurance coverage   with health insurance coverage
## [19] with health insurance coverage with health insurance coverage
## Levels: no health insurance coverage with health insurance coverage
# contingency table of counts
(RaceHIC = table(ACSData$race,ACSData$hcovany)) # race and health insurance coverage
##                                   
##                                    no health insurance coverage
##   white                                                    7041
##   black/african american/negro                             1541
##   american indian or alaska native                          170
##   chinese                                                    91
##   japanese                                                   19
##   other asian or pacific islander                           327
##   other race, nec                                          1291
##   two major races                                           284
##   three or more major races                                  33
##                                   
##                                    with health insurance coverage
##   white                                                     80118
##   black/african american/negro                              12867
##   american indian or alaska native                            745
##   chinese                                                    1504
##   japanese                                                    270
##   other asian or pacific islander                            4919
##   other race, nec                                            4731
##   two major races                                            3295
##   three or more major races                                   461
# compute marginal percent (per column) from contingency table
library(magrittr)
(RaceHIC_mgCol = prop.table(RaceHIC,
                            margin = 2)%>%round(.,3))
##                                   
##                                    no health insurance coverage
##   white                                                   0.652
##   black/african american/negro                            0.143
##   american indian or alaska native                        0.016
##   chinese                                                 0.008
##   japanese                                                0.002
##   other asian or pacific islander                         0.030
##   other race, nec                                         0.120
##   two major races                                         0.026
##   three or more major races                               0.003
##                                   
##                                    with health insurance coverage
##   white                                                     0.736
##   black/african american/negro                              0.118
##   american indian or alaska native                          0.007
##   chinese                                                   0.014
##   japanese                                                  0.002
##   other asian or pacific islander                           0.045
##   other race, nec                                           0.043
##   two major races                                           0.030
##   three or more major races                                 0.004
#make a data frame from contingency table
RaceHIC_DF = as.data.frame(RaceHIC)
names(RaceHIC_DF) = c("race","HICstatus","counts")

RaceHIC_DF
##                                race                      HICstatus counts
## 1                             white   no health insurance coverage   7041
## 2      black/african american/negro   no health insurance coverage   1541
## 3  american indian or alaska native   no health insurance coverage    170
## 4                           chinese   no health insurance coverage     91
## 5                          japanese   no health insurance coverage     19
## 6   other asian or pacific islander   no health insurance coverage    327
## 7                   other race, nec   no health insurance coverage   1291
## 8                   two major races   no health insurance coverage    284
## 9         three or more major races   no health insurance coverage     33
## 10                            white with health insurance coverage  80118
## 11     black/african american/negro with health insurance coverage  12867
## 12 american indian or alaska native with health insurance coverage    745
## 13                          chinese with health insurance coverage   1504
## 14                         japanese with health insurance coverage    270
## 15  other asian or pacific islander with health insurance coverage   4919
## 16                  other race, nec with health insurance coverage   4731
## 17                  two major races with health insurance coverage   3295
## 18        three or more major races with health insurance coverage    461
#adding marginal percents:
RaceHIC_DF$pctCol = as.data.frame(RaceHIC_mgCol)[,3]

RaceHIC_DF
##                                race                      HICstatus counts
## 1                             white   no health insurance coverage   7041
## 2      black/african american/negro   no health insurance coverage   1541
## 3  american indian or alaska native   no health insurance coverage    170
## 4                           chinese   no health insurance coverage     91
## 5                          japanese   no health insurance coverage     19
## 6   other asian or pacific islander   no health insurance coverage    327
## 7                   other race, nec   no health insurance coverage   1291
## 8                   two major races   no health insurance coverage    284
## 9         three or more major races   no health insurance coverage     33
## 10                            white with health insurance coverage  80118
## 11     black/african american/negro with health insurance coverage  12867
## 12 american indian or alaska native with health insurance coverage    745
## 13                          chinese with health insurance coverage   1504
## 14                         japanese with health insurance coverage    270
## 15  other asian or pacific islander with health insurance coverage   4919
## 16                  other race, nec with health insurance coverage   4731
## 17                  two major races with health insurance coverage   3295
## 18        three or more major races with health insurance coverage    461
##    pctCol
## 1   0.652
## 2   0.143
## 3   0.016
## 4   0.008
## 5   0.002
## 6   0.030
## 7   0.120
## 8   0.026
## 9   0.003
## 10  0.736
## 11  0.118
## 12  0.007
## 13  0.014
## 14  0.002
## 15  0.045
## 16  0.043
## 17  0.030
## 18  0.004

a grouped chart

library(ggplot2)
base1 = ggplot(data = RaceHIC_DF, 
             aes(x = HICstatus, y = counts,
                 fill= race ))

barGC = base1 +  geom_bar(stat = "identity",
                          position = 'dodge') 
barGC

# add labels
barGC + geom_text(position = position_dodge(width=0.9),
                  angle = 90,
                  hjust = -0.5,
                  aes(label = counts)) 

# change color
barGC + scale_fill_brewer(palette="Paired")

reorder by race

RaceHIC_DF$race = factor(RaceHIC_DF$race,
                         levels = c("white","black/african american/negro","other asian or pacific islander", "other race, nec","two major races","chinese","american indian or alaska native","three or more major races","japanese"))
library(ggplot2)
base1 = ggplot(data = RaceHIC_DF, 
               aes(x = HICstatus,
                   y = counts,
                   fill= race ))

barGC = base1 +  geom_bar(stat = "identity",
                          position = 'dodge') 
#labels
barGC = barGC + geom_text(position = position_dodge(width=0.9),
                          angle = 90,
                          hjust = -0.5,
                          aes(label = counts)) 
# palette with ordering
barGC = barGC + scale_fill_brewer(name = "PRECINCT",
                          palette = "BuPu",
                          direction = -1)
barGC

go from dodge to stack:

# Stacked bar plot
conditionColor = ifelse(RaceHIC_DF$race%in%c("white",'black/african american/negro'),'grey80','grey50')
barStacked = base1 + geom_bar(stat = "identity",
                              position = 'stack')#default
barStacked = barStacked + geom_text(size = 5,
                                    fontface='bold',
                                    position = position_stack(vjust = 0.5),
                                    color=conditionColor,
                                    aes(label=counts))# its own AES!
barStacked + scale_fill_brewer(palette="GnBu",
                               direction = -1)

stacked percent

library(scales) 

base2 = ggplot(data = RaceHIC_DF, 
               aes(fill = race,y = counts,x = HICstatus)) 

barStackPct = base1 + geom_bar(stat = "identity",
                               position = "fill")

barStackPct1 = barStackPct + geom_text(size = 5,
                                       position = position_fill(vjust = 0.5),
                                       aes(label = percent(pctCol,accuracy = 0.1)))

barStackPct1

# change the ratio of x, y axis
barStackPct2 = barStackPct + geom_text(size = 1,
                                       position = position_fill(vjust = 0.5),
                                       aes(label = percent(pctCol,accuracy = 0.1)))

barStackPct2 = barStackPct2 + coord_fixed(ratio=10/1)+scale_y_continuous(breaks = seq(0,1,0.1))
barStackPct2 

Couldn’t show all. It seems this is the “bad idea” introduced in class. I would have to reproduce the table.

try heatplot

base  = ggplot(RaceHIC_DF, aes(x = HICstatus, 
                               y = reorder(race, pctCol), 
                               fill = pctCol*100)) 
heat = base +  geom_tile()

# color intensity
heat = heat + scale_fill_gradient(low = "white",
                                  high = "black")
heat = heat + theme_classic()

heat

# improve heat plot

heat = heat + labs(y="Race", "Health Insurance Coverage Status")
heat = heat + theme(axis.text.x = element_text(angle = 0, 
                                               vjust = 0.6), 
                      legend.title = element_blank(), #no leg. title 
                      legend.position="top", 
                      legend.direction="horizontal",
                      legend.key.width=unit(1, "cm"),
                      legend.key.height=unit(1, "cm")) 

heat

TitleRaceHIC = list(Title = "Heat Plot-Race and Health Insurance Coverage in the U.S.",
                    SubTi = NULL,
                    XTi = "Health Insurance Coverage Status",
                    YTi = "Race",
                    Sou = "Source: IPUMS USA")
# add title
heat2 = heat + labs(title = TitleRaceHIC$Title,
                    subtitle = TitleRaceHIC$SubTi,
                    x = TitleRaceHIC$XTi,
                    y = TitleRaceHIC$YTi,
                    caption = TitleRaceHIC$Sou)
heat2

# heat3: Title positions:
heat3 = heat2 + theme(plot.caption = element_text(hjust=1),
                      plot.title = element_text(hjust=0.5))

heat3

Still unobvious visually. Should I exclude “white”? Or is there some other way to retrieve this plot??

try contingency table

# plot a representation of contingency table:

library(ggplot2)                           
base3 = ggplot(RaceHIC_DF, aes(x = HICstatus,y = counts)) 
tablePlot = base3 + geom_point(aes(size = pctCol*100)) 
tablePlot = tablePlot + geom_text(aes(label = percent(pctCol)),
                                  nudge_x = 0.15,
                                  size = 3)
tablePlot

# improve the previous plot
tablePlot = tablePlot + theme_minimal()
tablePlot = tablePlot + theme(legend.position = "none")

tablePlot

# barplot with less information
base4 = ggplot(RaceHIC_DF, aes(x = race, y = counts)) 

# bars
bars = base4 + geom_bar(stat = "identity") + theme_minimal()

# bar per health insurance status with 'facet'
barsFa = bars + facet_grid(~ HICstatus) 

barsFa

# improve previous plot
# change the minimal theme
barsFa = barsFa + theme(axis.text.x = element_text(angle = 90,
                                                   hjust = 1,
                                                   size = 7))
barsFa

# original relationship Input-Output table
base5  = ggplot(RaceHIC_DF, aes(x = race,  y = pctCol ) ) 
barsIO = base5 + geom_bar( stat = "identity" )
barsIO = barsIO + facet_grid( ~ HICstatus) 
barsIO = barsIO + coord_flip()

barsIO

# introduce "reorder""

# race ordered by pctcol
base5b  = ggplot(RaceHIC_DF, 
                 aes(x = reorder(race, pctCol), #here
                     y = pctCol) ) 

barsIOb = base5b + geom_bar(stat = "identity")
barsIOb = barsIOb + facet_grid( ~ HICstatus) 
barsIOb = barsIOb + coord_flip() 

barsIOb = barsIOb + theme(axis.text.y = element_text(size = 7,angle = 45)) 

barsIOb

Nice try. Next step is to: add titles

# name the title
TitleRaHI = list(Title = " Health Insurance Coverage Status by Race in the United States",
                 SubTi = NULL,
                 XTi = "Race",
                 YTi = "Percentage, Health Insurance Coverage Status",
                 Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
barsIObTi = barsIOb + labs(title = TitleRaHI$Title,
                    subtitle = TitleRaHI$SubTi,
                    x = TitleRaHI$XTi,
                    y = TitleRaHI$YTi,
                    caption = TitleRaHI$Sou)
barsIObTi

title positions

barsIObTi1 = barsIObTi + theme(plot.caption = element_text(hjust = 1),
                               plot.title = element_text(hjust = 0.5))

Final Bivariable Plot

barsIObTi1

PUBPOL543-Map: Visualization of Poverty Rate by State in the United States

Menghan Zhai

Process ACSData

head(ACSData$poverty,40)
##  [1]   0   0   0   0  81   0   0   0  54   0   0   0   0 142   0   0   0 150   0
## [20] 109   0 457   0   0 112   1   0 169 135   0  84  46   0  20   1   0 122  27
## [39] 204 104

generate a new povertyst variable to show if one is in poverty. According to IPUM.USA, poverty < 100 represents “in poverty” status, and poverty>=100 is regarded as “not in poverty”.

ACSData$povertyst 
## NULL
ACSData$povertyst <- (ACSData$poverty < 100) # if poverty<100, then povertyst=TRUE, povertyst_num=1
ACSData$povertyst_num <- as.numeric(ACSData$povertyst) # generate a numeric variable to calculate poverty rate by state
head(ACSData$povertyst_num,20)
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0
head(ACSData,5) #check
##   hhwt numprec hhtype   adjust                 region statefip countyfip
## 1   58       1    n/a 1.010145 east south central div  alabama         0
## 2   64       1    n/a 1.010145 east south central div  alabama         0
## 3   54       1    n/a 1.010145 east south central div  alabama         0
## 4   75       1    n/a 1.010145 east south central div  alabama         0
## 5   26       1    n/a 1.010145 east south central div  alabama         3
##                                                                        metro
## 1 in metropolitan area: central/principal city status indeterminable (mixed)
## 2 in metropolitan area: central/principal city status indeterminable (mixed)
## 3                                                   not in metropolitan area
## 4                            in metropolitan area: in central/principal city
## 5 in metropolitan area: central/principal city status indeterminable (mixed)
##                                       city citypop ownershp mortgage acrehous
## 1 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 2 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 3 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 4 not in identifiable city (or size group)       0      n/a      n/a      n/a
## 5 not in identifiable city (or size group)       0      n/a      n/a      n/a
##   mortamt1 mortamt2 taxincl insincl propinsr owncost rentgrs costelec costgas
## 1        0        0     n/a     n/a        0   99999       0        0       0
## 2        0        0     n/a     n/a        0   99999       0        0       0
## 3        0        0     n/a     n/a        0   99999       0        0       0
## 4        0        0     n/a     n/a        0   99999       0        0       0
## 5        0        0     n/a     n/a        0   99999       0        0       0
##   costwatr costfuel hhincome foodstmp  valueh builtyr2 unitsstr phone vehicles
## 1        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 2        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 3        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 4        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
## 5        0        0  9999999       no 9999999      n/a      n/a   n/a      n/a
##                                           ssmc perwt                 famsize
## 1 households without a same-sex married couple    58 1 family member present
## 2 households without a same-sex married couple    64 1 family member present
## 3 households without a same-sex married couple    54 1 family member present
## 4 households without a same-sex married couple    75 1 family member present
## 5 households without a same-sex married couple    26 1 family member present
##      sex age                marst birthyr         marrno yrmarr
## 1   male  37 never married/single    1982 not applicable      0
## 2   male  19 never married/single    2000 not applicable      0
## 3 female  87              widowed    1932   married once   1953
## 4   male  26 never married/single    1993 not applicable      0
## 5   male  49 never married/single    1970 not applicable      0
##                           race                        raced       hispan
## 1                        white                        white not hispanic
## 2                        white                        white not hispanic
## 3                        white                        white not hispanic
## 4 black/african american/negro black/african american/negro not hispanic
## 5                        white                        white not hispanic
##        hispand            bpl           bpld citizen yrnatur yrimmig yrsusa1
## 1 not hispanic        indiana        indiana     n/a     n/a       0       0
## 2 not hispanic        alabama        alabama     n/a     n/a       0       0
## 3 not hispanic        alabama        alabama     n/a     n/a       0       0
## 4 not hispanic north carolina north carolina     n/a     n/a       0       0
## 5 not hispanic        alabama        alabama     n/a     n/a       0       0
##   language                 speakeng                   tribe racamind racasian
## 1  english yes, speaks only english not applicable or blank       no       no
## 2  english yes, speaks only english not applicable or blank       no       no
## 3  english yes, speaks only english not applicable or blank       no       no
## 4  english yes, speaks only english not applicable or blank       no       no
## 5  english yes, speaks only english not applicable or blank       no       no
##   racblk racpacis racwht racother                        hcovany
## 1     no       no    yes       no   no health insurance coverage
## 2     no       no    yes       no with health insurance coverage
## 3     no       no    yes       no with health insurance coverage
## 4    yes       no     no       no   no health insurance coverage
## 5     no       no    yes       no with health insurance coverage
##                                    hcovpriv
## 1 without private health insurance coverage
## 2    with private health insurance coverage
## 3    with private health insurance coverage
## 4 without private health insurance coverage
## 5 without private health insurance coverage
##                                hinsemp                          hinspur
## 1  no insurance through employer/union  no insurance purchased directly
## 2 has insurance through employer/union  no insurance purchased directly
## 3  no insurance through employer/union has insurance purchased directly
## 4  no insurance through employer/union  no insurance purchased directly
## 5  no insurance through employer/union  no insurance purchased directly
##                        hinstri                                  hcovpub
## 1 no insurance through tricare without public health insurance coverage
## 2 no insurance through tricare without public health insurance coverage
## 3 no insurance through tricare    with public health insurance coverage
## 4 no insurance through tricare without public health insurance coverage
## 5 no insurance through tricare    with public health insurance coverage
##                         hinscaid hinscare                  hinsva
## 1  no insurance through medicaid       no no insurance through va
## 2  no insurance through medicaid       no no insurance through va
## 3 has insurance through medicaid      yes no insurance through va
## 4  no insurance through medicaid       no no insurance through va
## 5 has insurance through medicaid       no no insurance through va
##                                      hinsihs            school
## 1 no insurance through indian health service no, not in school
## 2 no insurance through indian health service    yes, in school
## 3 no insurance through indian health service no, not in school
## 4 no insurance through indian health service no, not in school
## 5 no insurance through indian health service no, not in school
##                  educ                                        educd
## 1   1 year of college 1 or more years of college credit, no degree
## 2   1 year of college 1 or more years of college credit, no degree
## 3            grade 12                  regular high school diploma
## 4             grade 9                                      grade 9
## 5 grade 5, 6, 7, or 8                                      grade 8
##        schltype degfield degfieldd degfield2 degfield2d            empstat
## 1  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 2 public school      n/a       n/a       n/a        n/a           employed
## 3  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 4  not enrolled      n/a       n/a       n/a        n/a not in labor force
## 5  not enrolled      n/a       n/a       n/a        n/a           employed
##             empstatd                   labforce wkswork1    wkswork2 uhrswork
## 1 not in labor force no, not in the labor force       16 14-26 weeks       55
## 2            at work    yes, in the labor force       26 14-26 weeks       40
## 3 not in labor force no, not in the labor force        0         n/a      n/a
## 4 not in labor force no, not in the labor force       48 48-49 weeks       40
## 5            at work    yes, in the labor force       11  1-13 weeks       21
##   inctot ftotinc incwage incss incwelfr incinvst incretir incsupp incother
## 1  15800 9999999   15800     0        0        0        0       0        0
## 2    800 9999999     800     0        0        0        0       0        0
## 3  13800 9999999       0 13800        0        0        0       0        0
## 4  23500 9999999   23500     0        0        0        0       0        0
## 5  10700 9999999    1700     0        0        0        0    9000        0
##   incearn poverty                  diffrem                  diffphys
## 1   15800       0 has cognitive difficulty  no ambulatory difficulty
## 2     800       0  no cognitive difficulty  no ambulatory difficulty
## 3       0       0 has cognitive difficulty has ambulatory difficulty
## 4   23500       0  no cognitive difficulty  no ambulatory difficulty
## 5    1700      81 has cognitive difficulty has ambulatory difficulty
##                             diffmob diffcare                        diffsens
## 1  no independent living difficulty       no no vision or hearing difficulty
## 2  no independent living difficulty       no no vision or hearing difficulty
## 3 has independent living difficulty      yes no vision or hearing difficulty
## 4  no independent living difficulty       no no vision or hearing difficulty
## 5 has independent living difficulty      yes no vision or hearing difficulty
##   diffeye diffhear pwstate2 pwcounty            tranwork      carpool
## 1      no       no      n/a        0                 n/a          n/a
## 2      no       no  alabama        0 auto, truck, or van drives alone
## 3      no       no      n/a        0                 n/a          n/a
## 4      no       no      n/a        0                 n/a          n/a
## 5      no       no  alabama        3 auto, truck, or van     carpools
##         riders trantime departs arrives povertyst povertyst_num
## 1          n/a        0       0       0      TRUE             1
## 2 drives alone       15     902     919      TRUE             1
## 3          n/a        0       0       0      TRUE             1
## 4          n/a        0       0       0      TRUE             1
## 5            3       15     702     719      TRUE             1
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ACSData1 <- data.frame(ACSData$statefip , ACSData$povertyst_num) # Create a new data frame
head(ACSData1,5)
##   ACSData.statefip ACSData.povertyst_num
## 1          alabama                     1
## 2          alabama                     1
## 3          alabama                     1
## 4          alabama                     1
## 5          alabama                     1

calculate the percent of “in poverty” of each state

generate “count” variable to count for the total number of person of each state

ACSData1$count 
## NULL
ACSData1$count <- 1
head(ACSData1,5)
##   ACSData.statefip ACSData.povertyst_num count
## 1          alabama                     1     1
## 2          alabama                     1     1
## 3          alabama                     1     1
## 4          alabama                     1     1
## 5          alabama                     1     1

calculate

library(dplyr)
ACSData2 <- group_by(ACSData1, ACSData.statefip) %>%
# group_by(ACSData1, ACSData.STATEFP) %>% 
  mutate(percent = sum(ACSData.povertyst_num)/sum(count))
head(ACSData2,5)
## # A tibble: 5 x 4
## # Groups:   ACSData.statefip [1]
##   ACSData.statefip ACSData.povertyst_num count percent
##   <fct>                            <dbl> <dbl>   <dbl>
## 1 alabama                              1     1   0.197
## 2 alabama                              1     1   0.197
## 3 alabama                              1     1   0.197
## 4 alabama                              1     1   0.197
## 5 alabama                              1     1   0.197

save the “percent” to new dataframe

ACSData3 <- ACSData2 %>%
  distinct(ACSData.statefip, percent, .keep_all = TRUE)

head(ACSData3,5)
## # A tibble: 5 x 4
## # Groups:   ACSData.statefip [5]
##   ACSData.statefip ACSData.povertyst_num count percent
##   <fct>                            <dbl> <dbl>   <dbl>
## 1 alabama                              1     1  0.197 
## 2 puerto rico                          1     1  0.501 
## 3 alaska                               1     1  0.0997
## 4 arizona                              1     1  0.131 
## 5 arkansas                             1     1  0.174

generate a new “NAME” variable in ACSData3 which equals the original statefip in ACS data to merge the data and the map.

ACSData3$NAME = ACSData3$ACSData.statefip

head(ACSData3,5)
## # A tibble: 5 x 5
## # Groups:   ACSData.statefip [5]
##   ACSData.statefip ACSData.povertyst_num count percent NAME       
##   <fct>                            <dbl> <dbl>   <dbl> <fct>      
## 1 alabama                              1     1  0.197  alabama    
## 2 puerto rico                          1     1  0.501  puerto rico
## 3 alaska                               1     1  0.0997 alaska     
## 4 arizona                              1     1  0.131  arizona    
## 5 arkansas                             1     1  0.174  arkansas

capitalize NAME in ACSData3 so that it is the same as NAME in map

ACSData4 = as.data.frame(ACSData3) # change it to a data frame
head(ACSData4,5)
##   ACSData.statefip ACSData.povertyst_num count    percent        NAME
## 1          alabama                     1     1 0.19681177     alabama
## 2      puerto rico                     1     1 0.50108932 puerto rico
## 3           alaska                     1     1 0.09967846      alaska
## 4          arizona                     1     1 0.13064323     arizona
## 5         arkansas                     1     1 0.17434508    arkansas
ACSData4$NAME_char <- as.character(ACSData4$NAME) # Apply as.character function
head(ACSData4$NAME_char,5)
## [1] "alabama"     "puerto rico" "alaska"      "arizona"     "arkansas"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
ACSData4$NAME_char = capitalize(ACSData4$NAME_char)
head(ACSData4,5)
##   ACSData.statefip ACSData.povertyst_num count    percent        NAME
## 1          alabama                     1     1 0.19681177     alabama
## 2      puerto rico                     1     1 0.50108932 puerto rico
## 3           alaska                     1     1 0.09967846      alaska
## 4          arizona                     1     1 0.13064323     arizona
## 5         arkansas                     1     1 0.17434508    arkansas
##     NAME_char
## 1     Alabama
## 2 Puerto rico
## 3      Alaska
## 4     Arizona
## 5    Arkansas
ACSData4$NAME <- as.factor(ACSData4$NAME_char) 
head(ACSData4,5)
##   ACSData.statefip ACSData.povertyst_num count    percent        NAME
## 1          alabama                     1     1 0.19681177     Alabama
## 2      puerto rico                     1     1 0.50108932 Puerto rico
## 3           alaska                     1     1 0.09967846      Alaska
## 4          arizona                     1     1 0.13064323     Arizona
## 5         arkansas                     1     1 0.17434508    Arkansas
##     NAME_char
## 1     Alabama
## 2 Puerto rico
## 3      Alaska
## 4     Arizona
## 5    Arkansas

new column “poverty_rate” with the right name to show in map

ACSData4$poverty_rate = ACSData4$percent 
head(ACSData4,5)
##   ACSData.statefip ACSData.povertyst_num count    percent        NAME
## 1          alabama                     1     1 0.19681177     Alabama
## 2      puerto rico                     1     1 0.50108932 Puerto rico
## 3           alaska                     1     1 0.09967846      Alaska
## 4          arizona                     1     1 0.13064323     Arizona
## 5         arkansas                     1     1 0.17434508    Arkansas
##     NAME_char poverty_rate
## 1     Alabama   0.19681177
## 2 Puerto rico   0.50108932
## 3      Alaska   0.09967846
## 4     Arizona   0.13064323
## 5    Arkansas   0.17434508

open the map

linkMap="https://github.com/G7-PUBPOL-543/maps/raw/main/states.geojson"
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
mapUS=read_sf(linkMap)
# see data in map
head(mapUS)
## Simple feature collection with 6 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7318 ymin: 30.35893 xmax: -80.83973 ymax: 49.00236
## geographic CRS: NAD83
## # A tibble: 6 x 4
##   STATEFP STUSPS NAME                                                   geometry
##   <chr>   <chr>  <chr>                                        <MULTIPOLYGON [°]>
## 1 31      NE     Nebraska  (((-104.053 43.00059, -103.6183 43.00068, -103.133 4…
## 2 53      WA     Washingt… (((-122.526 47.35891, -122.514 47.4489, -122.46 47.4…
## 3 35      NM     New Mexi… (((-109.0452 36.99908, -108.646 36.99926, -108.2494 …
## 4 46      SD     South Da… (((-104.0577 44.99743, -104.0397 45.00133, -104.0401…
## 5 21      KY     Kentucky  (((-89.13268 36.9822, -89.16645 37.00334, -89.18251 …
## 6 13      GA     Georgia   (((-85.60516 34.98468, -85.38497 34.98299, -84.93931…
str(ACSData4)
## 'data.frame':    52 obs. of  7 variables:
##  $ ACSData.statefip     : Factor w/ 62 levels "alabama","alaska",..: 1 60 2 3 4 5 6 7 8 9 ...
##  $ ACSData.povertyst_num: num  1 1 1 1 1 1 1 0 1 1 ...
##  $ count                : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ percent              : num  0.1968 0.5011 0.0997 0.1306 0.1743 ...
##  $ NAME                 : Factor w/ 52 levels "Alabama","Alaska",..: 1 40 2 3 4 5 6 7 8 9 ...
##  $ NAME_char            : chr  "Alabama" "Puerto rico" "Alaska" "Arizona" ...
##  $ poverty_rate         : num  0.1968 0.5011 0.0997 0.1306 0.1743 ...

Merge ACS data and U.S. map by state name

mapUSVars=merge(mapUS,
                ACSData4, 
                by='NAME') 

mapUSVars
## Simple feature collection with 40 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -170 ymin: 18.92245 xmax: -66.97626 ymax: 71.35256
## geographic CRS: NAD83
## First 10 features:
##           NAME STATEFP STUSPS ACSData.statefip ACSData.povertyst_num count
## 1      Alabama      01     AL          alabama                     1     1
## 2       Alaska      02     AK           alaska                     1     1
## 3      Arizona      04     AZ          arizona                     1     1
## 4     Arkansas      05     AR         arkansas                     1     1
## 5   California      06     CA       california                     1     1
## 6     Colorado      08     CO         colorado                     1     1
## 7  Connecticut      09     CT      connecticut                     0     1
## 8     Delaware      10     DE         delaware                     1     1
## 9      Florida      12     FL          florida                     1     1
## 10     Georgia      13     GA          georgia                     1     1
##       percent   NAME_char poverty_rate                       geometry
## 1  0.19681177     Alabama   0.19681177 MULTIPOLYGON (((-88.3273 30...
## 2  0.09967846      Alaska   0.09967846 MULTIPOLYGON (((-154.1567 5...
## 3  0.13064323     Arizona   0.13064323 MULTIPOLYGON (((-114.7196 3...
## 4  0.17434508    Arkansas   0.17434508 MULTIPOLYGON (((-89.7331 36...
## 5  0.14502554  California   0.14502554 MULTIPOLYGON (((-118.6044 3...
## 6  0.12091503    Colorado   0.12091503 MULTIPOLYGON (((-109.0501 4...
## 7  0.11671687 Connecticut   0.11671687 MULTIPOLYGON (((-73.48731 4...
## 8  0.16358839    Delaware   0.16358839 MULTIPOLYGON (((-75.7886 39...
## 9  0.13959053     Florida   0.13959053 MULTIPOLYGON (((-80.84849 2...
## 10 0.17073805     Georgia   0.17073805 MULTIPOLYGON (((-85.60516 3...

1.Base layer: Map of U.S.

library(ggplot2)
# plot original map
base = ggplot(data = mapUS) + geom_sf(fill = 'grey90',
                                    color = NA) + theme_classic()

base #base map of the U.S.

2.Data layer: Poverty Rate Map by State in the United States

povMap= base + geom_sf(data = mapUSVars,
                       aes(fill = poverty_rate),
                       color = NA)
povMap

change color

povMap1 = povMap + scale_fill_gradient(low = 'blue',
                                       high= 'yellow')

add titles

# name the title
TitlePovMap = list(Title = " Poverty Rate Map by State in the United States",
                   SubTi = NULL,
                   XTi = NULL,
                   YTi = NULL,
                   Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
povMap2 = povMap1 + labs(title = TitlePovMap$Title,
                    subtitle = TitlePovMap$SubTi,
                    x = TitlePovMap$XTi,
                    y = TitlePovMap$YTi,
                    caption = TitlePovMap$Sou)
povMap2

title positions

povMap3 = povMap2 + theme(plot.caption = element_text(hjust = 1),
                          plot.title = element_text(hjust = 0.5))

Final Map

povMap3

That’s all for my final project. Thank you!!